package org.shj.spark.operator;

import java.util.Arrays;
import java.util.List;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.VoidFunction;

/**
 * 取样操作
 * @author Administrator
 *
 */
public class SampleOperator {

	public static void main(String[] args) {
		SparkConf conf = new SparkConf().setMaster("local").setAppName("SampleOperator");
		JavaSparkContext sc = new JavaSparkContext(conf);
		
		List<String> list = Arrays.asList("zhuyin01","zhuyin02","zhuyin03","zhuyin04","zhuyin05","zhuyin06",
				"zhuyin07","zhuyin08","zhuyin09","zhuyin10","zhuyin11","zhuyin12");
		
		JavaRDD<String> rdd = sc.parallelize(list, 2);//设置有2个并行的task
		
		rdd.sample(false, 0.33).foreach(new VoidFunction<String>(){
			private static final long serialVersionUID = 5903846729813844694L;

			public void call(String t) throws Exception {
				System.out.println(t);
			}			
		});

		sc.close();
	}

}
